#!/bin/bash

# 启动lmcache控制器
PYTHONHASHSEED=123 /opt/venv/bin/lmcache_controller --host localhost --port 6000 --monitor-ports '{"pull": 8300, "reply": 8400}' &

# 启动第一个vllm服务实例（使用GPU 0）
PYTHONHASHSEED=123 UCX_TLS=rc CUDA_VISIBLE_DEVICES=0,1 LMCACHE_CONFIG_FILE=/data1/jzh/p2p/nixl/lm1.yaml \
/opt/venv/bin/vllm serve /data1/DeepSeek-R1-Distill-Qwen-32B \
    --gpu-memory-utilization 0.8 \
    --port 8010 \
    --kv-transfer-config '{"kv_connector":"LMCacheConnectorV1", "kv_role":"kv_both"}' &

# 启动第二个vllm服务实例（使用GPU 1）
PYTHONHASHSEED=123 UCX_TLS=rc CUDA_VISIBLE_DEVICES=2,3 LMCACHE_CONFIG_FILE=/data1/jzh/p2p/nixl/lm2.yaml \
/opt/venv/bin/vllm serve /data1/DeepSeek-R1-Distill-Qwen-32B \
    --gpu-memory-utilization 0.8 \
    --port 8011 \
    --kv-transfer-config '{"kv_connector":"LMCacheConnectorV1", "kv_role":"kv_both"}' &

# 等待所有后台进程完成
wait
echo "All services have been started."